/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set ts=8 sts=4 et sw=4 tw=99: */// Copyright 2011 the V8 project authors. All rights reserved.// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above// copyright notice, this list of conditions and the following// disclaimer in the documentation and/or other materials provided// with the distribution.// * Neither the name of Google Inc. nor the names of its// contributors may be used to endorse or promote products derived// from this software without specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.// A simple interpreter for the Irregexp byte code.#include"irregexp/RegExpBytecode.h"#include"irregexp/RegExpMacroAssembler.h"#include"vm/MatchPairs.h"#include"jscntxtinlines.h"usingnamespacejs;usingnamespacejs::irregexp;staticconstsize_tkBitsPerByte=8;staticconstsize_tkBitsPerByteLog2=3;classMOZ_STACK_CLASSRegExpStackCursor{public:explicitRegExpStackCursor(JSContext*cx):cx(cx),cursor(nullptr){}boolinit(){if(!stack.init()){ReportOutOfMemory(cx);returnfalse;}cursor=base();returntrue;}boolpush(int32_tvalue){*cursor++=value;if(cursor>=stack.limit()){int32_tpos=position();if(!stack.grow()){ReportOverRecursed(cx);returnfalse;}setPosition(pos);}returntrue;}int32_tpop(){MOZ_ASSERT(cursor>base());return*--cursor;}int32_tpeek(){MOZ_ASSERT(cursor>base());return*(cursor-1);}int32_tposition(){MOZ_ASSERT(cursor>=base());returncursor-base();}voidsetPosition(int32_tposition){cursor=base()+position;MOZ_ASSERT(cursor<stack.limit());}private:JSContext*cx;RegExpStackstack;int32_t*cursor;int32_t*base(){return(int32_t*)stack.base();}};staticint32_tLoad32Aligned(constuint8_t*pc){MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc)&3)==0);return*reinterpret_cast<constint32_t*>(pc);}staticint32_tLoad16Aligned(constuint8_t*pc){MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc)&1)==0);return*reinterpret_cast<constuint16_t*>(pc);}#define BYTECODE(name) case BC_##name:template<typenameCharT>RegExpRunStatusirregexp::InterpretCode(JSContext*cx,constuint8_t*byteCode,constCharT*chars,size_tcurrent,size_tlength,MatchPairs*matches,size_t*endIndex){constuint8_t*pc=byteCode;uint32_tcurrent_char=current?chars[current-1]:'\n';RegExpStackCursorstack(cx);if(!stack.init())returnRegExpRunStatus_Error;int32_tnumRegisters=Load32Aligned(pc);pc+=4;Vector<int32_t,0,SystemAllocPolicy>registers;if(!registers.growByUninitialized(numRegisters)){ReportOutOfMemory(cx);returnRegExpRunStatus_Error;}for(size_ti=0;i<(size_t)numRegisters;i++)registers[i]=-1;while(true){int32_tinsn=Load32Aligned(pc);switch(insn&BYTECODE_MASK){BYTECODE(BREAK)MOZ_CRASH("Bad bytecode: BREAK");BYTECODE(PUSH_CP)if(!stack.push(current))returnRegExpRunStatus_Error;pc+=BC_PUSH_CP_LENGTH;break;BYTECODE(PUSH_BT)if(!stack.push(Load32Aligned(pc+4)))returnRegExpRunStatus_Error;pc+=BC_PUSH_BT_LENGTH;break;BYTECODE(PUSH_REGISTER)if(!stack.push(registers[insn>>BYTECODE_SHIFT]))returnRegExpRunStatus_Error;pc+=BC_PUSH_REGISTER_LENGTH;break;BYTECODE(SET_REGISTER)registers[insn>>BYTECODE_SHIFT]=Load32Aligned(pc+4);pc+=BC_SET_REGISTER_LENGTH;break;BYTECODE(ADVANCE_REGISTER)registers[insn>>BYTECODE_SHIFT]+=Load32Aligned(pc+4);pc+=BC_ADVANCE_REGISTER_LENGTH;break;BYTECODE(SET_REGISTER_TO_CP)registers[insn>>BYTECODE_SHIFT]=current+Load32Aligned(pc+4);pc+=BC_SET_REGISTER_TO_CP_LENGTH;break;BYTECODE(SET_CP_TO_REGISTER)current=registers[insn>>BYTECODE_SHIFT];pc+=BC_SET_CP_TO_REGISTER_LENGTH;break;BYTECODE(SET_REGISTER_TO_SP)registers[insn>>BYTECODE_SHIFT]=stack.position();pc+=BC_SET_REGISTER_TO_SP_LENGTH;break;BYTECODE(SET_SP_TO_REGISTER)stack.setPosition(registers[insn>>BYTECODE_SHIFT]);pc+=BC_SET_SP_TO_REGISTER_LENGTH;break;BYTECODE(POP_CP)current=stack.pop();pc+=BC_POP_CP_LENGTH;break;BYTECODE(POP_BT)if(!CheckForInterrupt(cx))returnRegExpRunStatus_Error;pc=byteCode+stack.pop();break;BYTECODE(POP_REGISTER)registers[insn>>BYTECODE_SHIFT]=stack.pop();pc+=BC_POP_REGISTER_LENGTH;break;BYTECODE(FAIL)returnRegExpRunStatus_Success_NotFound;BYTECODE(SUCCEED)if(matches)memcpy(matches->pairsRaw(),registers.begin(),matches->length()*2*sizeof(int32_t));elseif(endIndex)*endIndex=registers[1];returnRegExpRunStatus_Success;BYTECODE(ADVANCE_CP)current+=insn>>BYTECODE_SHIFT;pc+=BC_ADVANCE_CP_LENGTH;break;BYTECODE(GOTO)pc=byteCode+Load32Aligned(pc+4);break;BYTECODE(ADVANCE_CP_AND_GOTO)current+=insn>>BYTECODE_SHIFT;pc=byteCode+Load32Aligned(pc+4);break;BYTECODE(CHECK_GREEDY)if((int32_t)current==stack.peek()){stack.pop();pc=byteCode+Load32Aligned(pc+4);}else{pc+=BC_CHECK_GREEDY_LENGTH;}break;BYTECODE(LOAD_CURRENT_CHAR){size_tpos=current+(insn>>BYTECODE_SHIFT);if(pos>=length){pc=byteCode+Load32Aligned(pc+4);}else{current_char=chars[pos];pc+=BC_LOAD_CURRENT_CHAR_LENGTH;}break;}BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED){intpos=current+(insn>>BYTECODE_SHIFT);current_char=chars[pos];pc+=BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;break;}BYTECODE(LOAD_2_CURRENT_CHARS){size_tpos=current+(insn>>BYTECODE_SHIFT);if(pos+2>length){pc=byteCode+Load32Aligned(pc+4);}else{CharTnext=chars[pos+1];current_char=(chars[pos]|(next<<(kBitsPerByte*sizeof(CharT))));pc+=BC_LOAD_2_CURRENT_CHARS_LENGTH;}break;}BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED){intpos=current+(insn>>BYTECODE_SHIFT);char16_tnext=chars[pos+1];current_char=(chars[pos]|(next<<(kBitsPerByte*sizeof(char16_t))));pc+=BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;break;}BYTECODE(LOAD_4_CURRENT_CHARS)MOZ_CRASH("ASCII handling implemented");BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED)MOZ_CRASH("ASCII handling implemented");BYTECODE(CHECK_4_CHARS){uint32_tc=Load32Aligned(pc+4);if(c==current_char)pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_CHECK_4_CHARS_LENGTH;break;}BYTECODE(CHECK_CHAR){uint32_tc=(insn>>BYTECODE_SHIFT);if(c==current_char)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_CHAR_LENGTH;break;}BYTECODE(CHECK_NOT_4_CHARS){uint32_tc=Load32Aligned(pc+4);if(c!=current_char)pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_CHECK_NOT_4_CHARS_LENGTH;break;}BYTECODE(CHECK_NOT_CHAR){uint32_tc=(insn>>BYTECODE_SHIFT);if(c!=current_char)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_NOT_CHAR_LENGTH;break;}BYTECODE(AND_CHECK_4_CHARS){uint32_tc=Load32Aligned(pc+4);if(c==(current_char&Load32Aligned(pc+8)))pc=byteCode+Load32Aligned(pc+12);elsepc+=BC_AND_CHECK_4_CHARS_LENGTH;break;}BYTECODE(AND_CHECK_CHAR){uint32_tc=(insn>>BYTECODE_SHIFT);if(c==(current_char&Load32Aligned(pc+4)))pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_AND_CHECK_CHAR_LENGTH;break;}BYTECODE(AND_CHECK_NOT_4_CHARS){uint32_tc=Load32Aligned(pc+4);if(c!=(current_char&Load32Aligned(pc+8)))pc=byteCode+Load32Aligned(pc+12);elsepc+=BC_AND_CHECK_NOT_4_CHARS_LENGTH;break;}BYTECODE(AND_CHECK_NOT_CHAR){uint32_tc=(insn>>BYTECODE_SHIFT);if(c!=(current_char&Load32Aligned(pc+4)))pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_AND_CHECK_NOT_CHAR_LENGTH;break;}BYTECODE(MINUS_AND_CHECK_NOT_CHAR){uint32_tc=(insn>>BYTECODE_SHIFT);uint32_tminus=Load16Aligned(pc+4);uint32_tmask=Load16Aligned(pc+6);if(c!=((current_char-minus)&mask))pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;break;}BYTECODE(CHECK_CHAR_IN_RANGE){uint32_tfrom=Load16Aligned(pc+4);uint32_tto=Load16Aligned(pc+6);if(from<=current_char&¤t_char<=to)pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_CHECK_CHAR_IN_RANGE_LENGTH;break;}BYTECODE(CHECK_CHAR_NOT_IN_RANGE){uint32_tfrom=Load16Aligned(pc+4);uint32_tto=Load16Aligned(pc+6);if(from>current_char||current_char>to)pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;break;}BYTECODE(CHECK_BIT_IN_TABLE){intmask=RegExpMacroAssembler::kTableMask;uint8_tb=pc[8+((current_char&mask)>>kBitsPerByteLog2)];intbit=(current_char&(kBitsPerByte-1));if((b&(1<<bit))!=0)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_BIT_IN_TABLE_LENGTH;break;}BYTECODE(CHECK_LT){uint32_tlimit=(insn>>BYTECODE_SHIFT);if(current_char<limit)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_LT_LENGTH;break;}BYTECODE(CHECK_GT){uint32_tlimit=(insn>>BYTECODE_SHIFT);if(current_char>limit)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_GT_LENGTH;break;}BYTECODE(CHECK_REGISTER_LT)if(registers[insn>>BYTECODE_SHIFT]<Load32Aligned(pc+4))pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_CHECK_REGISTER_LT_LENGTH;break;BYTECODE(CHECK_REGISTER_GE)if(registers[insn>>BYTECODE_SHIFT]>=Load32Aligned(pc+4))pc=byteCode+Load32Aligned(pc+8);elsepc+=BC_CHECK_REGISTER_GE_LENGTH;break;BYTECODE(CHECK_REGISTER_EQ_POS)if(registers[insn>>BYTECODE_SHIFT]==(int32_t)current)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_REGISTER_EQ_POS_LENGTH;break;BYTECODE(CHECK_NOT_REGS_EQUAL)if(registers[insn>>BYTECODE_SHIFT]==registers[Load32Aligned(pc+4)])pc+=BC_CHECK_NOT_REGS_EQUAL_LENGTH;elsepc=byteCode+Load32Aligned(pc+8);break;BYTECODE(CHECK_NOT_BACK_REF){intfrom=registers[insn>>BYTECODE_SHIFT];intlen=registers[(insn>>BYTECODE_SHIFT)+1]-from;if(from<0||len<=0){pc+=BC_CHECK_NOT_BACK_REF_LENGTH;break;}if(current+len>length){pc=byteCode+Load32Aligned(pc+4);break;}else{inti;for(i=0;i<len;i++){if(chars[from+i]!=chars[current+i]){pc=byteCode+Load32Aligned(pc+4);break;}}if(i<len)break;current+=len;}pc+=BC_CHECK_NOT_BACK_REF_LENGTH;break;}BYTECODE(CHECK_NOT_BACK_REF_NO_CASE){intfrom=registers[insn>>BYTECODE_SHIFT];intlen=registers[(insn>>BYTECODE_SHIFT)+1]-from;if(from<0||len<=0){pc+=BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;break;}if(current+len>length){pc=byteCode+Load32Aligned(pc+4);break;}if(CaseInsensitiveCompareStrings(chars+from,chars+current,len*sizeof(CharT))){current+=len;pc+=BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;}else{pc=byteCode+Load32Aligned(pc+4);}break;}BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE){intfrom=registers[insn>>BYTECODE_SHIFT];intlen=registers[(insn>>BYTECODE_SHIFT)+1]-from;if(from<0||len<=0){pc+=BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;break;}if(current+len>length){pc=byteCode+Load32Aligned(pc+4);break;}if(CaseInsensitiveCompareUCStrings(chars+from,chars+current,len*sizeof(CharT))){current+=len;pc+=BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;}else{pc=byteCode+Load32Aligned(pc+4);}break;}BYTECODE(CHECK_AT_START)if(current==0)pc=byteCode+Load32Aligned(pc+4);elsepc+=BC_CHECK_AT_START_LENGTH;break;BYTECODE(CHECK_NOT_AT_START)if(current==0)pc+=BC_CHECK_NOT_AT_START_LENGTH;elsepc=byteCode+Load32Aligned(pc+4);break;BYTECODE(SET_CURRENT_POSITION_FROM_END){size_tby=static_cast<uint32_t>(insn)>>BYTECODE_SHIFT;if(length-current>by){current=length-by;current_char=chars[current-1];}pc+=BC_SET_CURRENT_POSITION_FROM_END_LENGTH;break;}default:MOZ_CRASH("Bad bytecode");}}}templateRegExpRunStatusirregexp::InterpretCode(JSContext*cx,constuint8_t*byteCode,constLatin1Char*chars,size_tcurrent,size_tlength,MatchPairs*matches,size_t*endIndex);templateRegExpRunStatusirregexp::InterpretCode(JSContext*cx,constuint8_t*byteCode,constchar16_t*chars,size_tcurrent,size_tlength,MatchPairs*matches,size_t*endIndex);